package me.xuym.crawler.dygang.convertor;

import com.cbs.java.component.application.Application;
import me.xuym.crawler.Processor;
import me.xuym.crawler.dygang.entity.DYGangWebResult;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by broche on 13/09/2017.
 */
public class DYGangWebConvertor extends Processor<String, List<DYGangWebResult>> {
    public DYGangWebConvertor(Application application) {
        super(application);
    }

    @Override
    protected List<DYGangWebResult> onProcess(String content) throws Exception {
        List<DYGangWebResult> results = new ArrayList<>();
        content = content.replace("\n", "");
        Pattern itemPattern = Pattern.compile("<a href=\"([^\"]*?)\" target=\"_blank\"><img src=\"[^\"]*?\" alt=\"([^\"]*?)\" width=\"120\" height=\"150\" border=\"0\" /></a>");
        Matcher itemMatcher = itemPattern.matcher(content);
        while (itemMatcher.find()) {
            DYGangWebResult result = new DYGangWebResult();
            result.setName(itemMatcher.group(2));
            result.setUrl(itemMatcher.group(1));
            results.add(result);
        }
        return results;
    }
}
